This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
scores_loc <- read.table("/Users/jialincheoh/Downloads/overall_score_click_result.csv", header=TRUE, sep = ",")
scores_loc
scores_novelty <- scores_loc[, c('Group', 'novelty.phase5')]
scores_novelty
library(onewaytests)
charac <- as.character(scores_novelty$Group)
bf.test(novelty.phase5 ~ charac, scores_novelty)
Brown-Forsythe Test (alpha = 0.05)
-------------------------------------------------------------
data : novelty.phase5 and charac
statistic : 1.05192
num df : 3
denom df : 159.3847
p.value : 0.3713216
Result : Difference is not statistically significant.
-------------------------------------------------------------
lmnovelty <- lm(novelty.phase5 ~ Group, scores_novelty)
plot(lmnovelty)
lmtest::bptest(lmnovelty)
studentized Breusch-Pagan test
data: lmnovelty
BP = 0.12658, df = 1, p-value = 0.722
car::ncvTest(lmnovelty)
Non-constant Variance Score Test
Variance formula: ~ fitted.values
Chisquare = 0.0376888, Df = 1, p = 0.84607
# Not normal which is a BIG problem
shapiro.test(residuals(lmnovelty))
Shapiro-Wilk normality test
data: residuals(lmnovelty)
W = 0.91828, p-value = 5.659e-08
scores_novelty$novelty.phase5[scores_novelty$novelty.phase5==0] <- 0.00000000001
library(MASS)
bcmle <- boxcox(lm(novelty.phase5~Group, data=scores_novelty), lambda=seq(-3, 3, by=0.1))
lambda<-bcmle$x[which.max(bcmle$y)]
lambda
[1] 0.09090909
scores_novelty$novelty.phase5
[1] 5.0000e+01 6.7500e+01 2.5000e+01 1.6670e+01 8.7500e+01 5.0000e+01 1.0000e-11 6.0000e+01 6.6670e+01 6.8750e+01 6.2500e+01
[12] 5.2500e+01 6.6670e+01 1.0000e-11 2.5000e+01 7.5000e+01 2.5000e+01 2.5000e+01 7.5000e+01 1.0000e-11 8.7500e+01 1.0000e-11
[23] 2.5000e+01 7.5000e+01 3.7500e+01 2.5000e+01 6.2500e+01 7.5000e+01 1.0000e-11 1.0625e+02 7.5000e+01 5.8330e+01 1.0000e-11
[34] 1.0000e-11 1.0000e-11 1.0000e-11 1.0000e-11 1.0000e-11 1.0000e-11 2.5000e+01 1.0000e-11 6.2500e+01 1.0000e-11 2.5000e+01
[45] 1.0000e-11 2.5000e+01 6.2500e+01 1.0000e+02 1.0000e-11 8.2500e+01 8.7500e+01 6.2500e+01 7.7500e+01 8.2500e+01 7.0000e+01
[56] 5.0000e+01 9.1670e+01 5.6250e+01 8.5000e+01 8.7500e+01 2.5000e+01 1.0000e-11 5.0000e+01 1.0000e-11 7.5000e+01 8.7500e+01
[67] 7.5000e+01 2.5000e+01 5.2500e+01 6.2500e+01 1.0000e-11 6.6670e+01 7.2500e+01 9.5000e+01 8.1250e+01 2.5000e+01 1.0000e-11
[78] 1.0000e-11 1.0000e-11 1.0000e-11 1.0000e-11 1.0000e-11 2.5000e+01 2.5000e+01 8.0000e+01 8.7500e+01 7.5000e+01 2.5000e+01
[89] 2.5000e+01 1.0000e-11 6.2500e+01 2.5000e+01 6.6670e+01 8.7500e+01 6.0000e+01 2.5000e+01 6.5000e+01 8.4380e+01 7.2500e+01
[100] 1.0000e-11 6.2500e+01 6.2500e+01 1.0000e+02 3.7500e+01 2.5000e+01 7.5000e+01 2.5000e+01 1.0000e-11 7.5000e+01 1.0000e-11
[111] 6.2500e+01 7.5000e+01 6.5630e+01 1.0000e+02 2.5000e+01 3.3330e+01 1.2500e+01 7.0000e+01 1.0000e-11 1.0000e-11 1.0000e-11
[122] 1.0000e-11 8.7500e+01 9.2500e+01 1.5000e+01 5.6250e+01 2.5000e+01 9.5000e+01 1.0000e-11 1.0000e-11 7.2500e+01 5.6250e+01
[133] 2.5000e+01 2.5000e+01 6.0000e+01 1.0000e+02 8.1250e+01 5.0000e+01 6.6670e+01 7.5000e+01 3.7500e+01 5.0000e+01 6.7500e+01
[144] 5.8330e+01 2.5000e+01 2.5000e+01 1.0000e-11 8.1250e+01 8.7500e+01 1.2500e+01 5.6250e+01 9.3750e+01 5.7500e+01 5.6250e+01
[155] 1.0000e-11 8.3330e+01 5.7500e+01 8.1250e+01 9.0000e+01 7.5000e+01 1.0000e-11 1.0000e-11 1.0000e-11 1.0000e-11
fullmodel.inv = lm((novelty.phase5)^0.1010101 ~ Group, data=scores_novelty )
plot(fullmodel.inv)
NA
shapiro.test(residuals(fullmodel.inv))
Shapiro-Wilk normality test
data: residuals(fullmodel.inv)
W = 0.68977, p-value < 2.2e-16
summary(lmnovelty)
Call:
lm(formula = novelty.phase5 ~ Group, data = scores_novelty)
Residuals:
Min 1Q Median 3Q Max
-49.629 -37.759 7.024 28.697 66.600
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 39.650 4.410 8.990 6.19e-16 ***
Group 3.327 2.349 1.416 0.159
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 33.47 on 162 degrees of freedom
Multiple R-squared: 0.01223, Adjusted R-squared: 0.006129
F-statistic: 2.005 on 1 and 162 DF, p-value: 0.1587
lmtotal <- lm(total.phase5 ~ Group, scores_loc)
summary(lmtotal)
Call:
lm(formula = total.phase5 ~ Group, data = scores_loc)
Residuals:
Min 1Q Median 3Q Max
-217.188 -80.067 6.134 100.438 188.822
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 164.098 14.303 11.473 <2e-16 ***
Group 17.697 7.619 2.323 0.0214 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 108.5 on 162 degrees of freedom
Multiple R-squared: 0.03223, Adjusted R-squared: 0.02625
F-statistic: 5.395 on 1 and 162 DF, p-value: 0.02144
lmtotal <- lm(total.phase5 ~ Group, scores_loc)
summary(lmtotal)
Call:
lm(formula = total.phase5 ~ Group, data = scores_loc)
Residuals:
Min 1Q Median 3Q Max
-217.188 -80.067 6.134 100.438 188.822
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 164.098 14.303 11.473 <2e-16 ***
Group 17.697 7.619 2.323 0.0214 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 108.5 on 162 degrees of freedom
Multiple R-squared: 0.03223, Adjusted R-squared: 0.02625
F-statistic: 5.395 on 1 and 162 DF, p-value: 0.02144
plot(lmtotal)
shapiro.test(residuals(lmtotal))
Shapiro-Wilk normality test
data: residuals(lmtotal)
W = 0.95955, p-value = 0.0001067
wilcox.test(scores_loc$total.phase5, scores_loc$Group, data=bogota_23)
Wilcoxon rank sum test with continuity correction
data: scores_loc$total.phase5 and scores_loc$Group
W = 24664, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
wilcox.test(scores_loc$Group, scores_loc$novelty.phase5)
Wilcoxon rank sum test with continuity correction
data: scores_loc$Group and scores_loc$novelty.phase5
W = 5904, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
wilcox.test(scores_loc$Group, scores_loc$user.requirement.phase5)
Wilcoxon rank sum test with continuity correction
data: scores_loc$Group and scores_loc$user.requirement.phase5
W = 6048, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
wilcox.test(scores_loc$Group, scores_loc$tech.phase5)
Wilcoxon rank sum test with continuity correction
data: scores_loc$Group and scores_loc$tech.phase5
W = 3312, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
pairwise.wilcox.test(scores_loc$total.phase5, scores_loc$Group,
p.adjust.method = "BH")
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Pairwise comparisons using Wilcoxon rank sum test with continuity correction
data: scores_loc$total.phase5 and scores_loc$Group
0 1 2
1 0.33 - -
2 0.33 0.93 -
3 0.11 0.37 0.33
P value adjustment method: BH
kruskal.test(total.phase5 ~ Group, data = scores_loc)
Kruskal-Wallis rank sum test
data: total.phase5 by Group
Kruskal-Wallis chi-squared = 5.949, df = 3, p-value = 0.1141
bogota_23 <- scores_loc[ which( scores_loc$Group == 2 | scores_loc$Group == 3) , ]
bogota_23
kruskal.test(novelty.phase5 ~ Group, data = bogota_23)
Kruskal-Wallis rank sum test
data: novelty.phase5 by Group
Kruskal-Wallis chi-squared = 0.85845, df = 1, p-value = 0.3542
wilcox.test(bogota_23$Group, bogota_23$total.phase5)
Wilcoxon rank sum test with continuity correction
data: bogota_23$Group and bogota_23$total.phase5
W = 581, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
library(AID)
library(onewaytests)
nor.test(novelty.phase5 ~ charac, data = scores_loc)
Shapiro-Wilk Normality Test (alpha = 0.05)
--------------------------------------------------
data : novelty.phase5 and charac
--------------------------------------------------
homog.test(total.phase5 ~ charac, data = scores_loc, method = "Fligner")
Fligner-Killeen Homogeneity Test (alpha = 0.05)
---------------------------------------------------
data : total.phase5 and charac
statistic : 0.3248996
parameter : 3
p.value : 0.9552795
Result : Variances are homogeneous.
---------------------------------------------------
scores_loc$total.phase5[scores_loc$total.phase5==0] <- 0.00000000001
out <- boxcoxfr(scores_loc$novelty.phase5, scores_loc$Group)
zero <- scores_loc[scores_loc$total.phase5 != 0.00000000001, ]
zero
lmtotal_revise <- lm(total.phase5 ~ Group, data=zero)
plot(lmtotal_revise)
library(AID)
library(onewaytests)
nor.test(total.phase5 ~ as.character(Group), data = zero)
Shapiro-Wilk Normality Test (alpha = 0.05)
--------------------------------------------------
data : total.phase5 and as.character(Group)
--------------------------------------------------
homog.test(total.phase5 ~ as.character(Group), data = zero, method = "Fligner")
Fligner-Killeen Homogeneity Test (alpha = 0.05)
---------------------------------------------------
data : total.phase5 and as.character(Group)
statistic : 2.606498
parameter : 3
p.value : 0.4563514
Result : Variances are homogeneous.
---------------------------------------------------
out <- boxcoxfr(zero$total.phase5, zero$Group)
Error in boxcoxfr(zero$total.phase5, zero$Group) :
Feasible region is null set. No solution.
Try to enlarge the range of feasible lambda values, lambda.
Try to decrease feasible region parameter, tau.
result<-aov.test(total.phase5 ~ as.character(Group), data = zero)
One-Way Analysis of Variance (alpha = 0.05)
-------------------------------------------------------------
data : total.phase5 and as.character(Group)
statistic : 1.025466
num df : 3
denom df : 142
p.value : 0.3833417
Result : Difference is not statistically significant.
-------------------------------------------------------------
pairwise.wilcox.test(zero$total.phase5, zero$Group,
p.adjust.method = "BH")
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Pairwise comparisons using Wilcoxon rank sum test with continuity correction
data: zero$total.phase5 and zero$Group
0 1 2
1 0.71 - -
2 0.71 0.93 -
3 0.51 0.51 0.51
P value adjustment method: BH
kruskal.test(total.phase5 ~ Group, data = zero)
Kruskal-Wallis rank sum test
data: total.phase5 by Group
Kruskal-Wallis chi-squared = 3.2169, df = 3, p-value = 0.3594
wilcox.test(bogota_23$total.phase5, bogota_23$Group)$p.value
[1] 5.729537e-21
wilcox.test(bogota_23$novelty.phase5, bogota_23$Group)$p.value
[1] 2.438505e-11
wilcox.test(bogota_23$user.requirement.phase5, bogota_23$Group)$p.value
[1] 4.279913e-09
wilcox.test(bogota_23$infovis.phase5, bogota_23$Group)$p.value
[1] 5.408198e-21
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(bogota_23, x = "Group", y = "total.phase5",
color = "Group",
ylab = "total.phase5", xlab = "Group")
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(bogota_23, x = "Group", y = "total.phase5",
color = "Group",
ylab = "total.phase5", xlab = "Group")
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(bogota_23, x = "Group", y = "novelty.phase5",
color = "Group",
ylab = "novelty.phase5", xlab = "Group")
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(bogota_23, x = "Group", y = "user.requirement.phase5",
color = "Group",
ylab = "user.requirement.phase5", xlab = "Group")
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(bogota_23, x = "Group", y = "tech.phase5",
color = "Group",
ylab = "tech.phase5", xlab = "Group")
# Plot weight by group and color by group
library("ggpubr")
Loading required package: ggplot2
ggboxplot(bogota_23, x = "Group", y = "infovis.phase5",
color = "Group",
ylab = "infovis.phase5", xlab = "Group")
kruskal.test( novelty.phase5 ~ Group, data = bogota_23)
Kruskal-Wallis rank sum test
data: novelty.phase5 by Group
Kruskal-Wallis chi-squared = 0.85845, df = 1, p-value = 0.3542
kruskal.test( total.phase5 ~ Group, data = bogota_23)
Kruskal-Wallis rank sum test
data: total.phase5 by Group
Kruskal-Wallis chi-squared = 1.5356, df = 1, p-value = 0.2153
bogota_23
bogota_23$novelty.phase5[bogota_23$novelty.phase5==0] <- 0.00000000001
library(MASS)
bcmle <- boxcox(lm(novelty.phase5~Group, data=bogota_23), lambda=seq(-3, 3, by=0.1))
lambda<-bcmle$x[which.max(bcmle$y)]
lambda
fullmodel.inv = lm((novelty.phase5)^0.1515 ~ Group, data=bogota_23 )
plot(fullmodel.inv)
shapiro.test(residuals(fullmodel.inv))
shapiro.test(residuals(lm(novelty.phase5 ~ Group, data=bogota_23)))
shapiro.test(residuals(lm(total.phase5 ~ Group, data=bogota_23)))
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(bogota_23, x = "Group", y = "tech.phase5",
color = "Group",
ylab = "tech.phase5", xlab = "Group")
bogota_23$infovis.phase5
kruskal.test( novelty.phase5 ~ Group, data = bogota_23)